import urllib.request
import re
from lxml import etree
base_url = 'http://117.73.11.244:9090'
headers = {
    'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
request = urllib.request.Request(base_url,headers=headers)
page = urllib.request.urlopen(request)  # 2
content = page.read().decode("utf-8", "ignore").lower()  # 2
re_tag = re.compile('\<[\S\s]+?\>', re.I)
re_script = re.compile('\<script[\S\s]+?\</script\>', re.I)  # Script
re_style = re.compile('\<style[\S\s]+?\</style\>', re.I)  # style
content = re_script.sub('', content)  # 去掉SCRIPT  2
content = re_style.sub('', content)  # 去掉style  2
content = re_tag.sub('', content)  # 去除所有尖括号内的HTML代码 2

selector = etree.HTML(content.encode("utf-8", 'ignore'))
menu_items = selector.xpath("/html/body")  # 5
def writefile(filename, content):
    try:
        fp = open(filename, 'a')  # 5
        fp.write(content + "\n")  # 5
        fp.close()  # 5
    except:
        return False
writefile("crawler_result.csv", content)  # 2